Many of the figures and analysis methodology were based off of this tutorial from NYU. https://learn.gencore.bio.nyu.edu/rna-seq-analysis/gene-set-enrichment-analysis/

DESeq2 Differential Expression Analysis

# Read Count Matrix
count.matrix <- read_tsv(trimws(sample_params$salmon_merged_gene_counts_file_path)) %>%
  mutate(across(3:ncol(.), ~ as.integer(.x))) %>%
  column_to_rownames("gene_id")

gene.reference <- dplyr::select(count.matrix, 1)

count.matrix <- count.matrix %>%
  dplyr::select(-1)
# Design ColData Matrix
sample <- colnames(count.matrix)
colData <- data.frame(sample)

# Add Columns for Condition to colData
for (i in colnames(metadata)){
  colData[i] <- metadata[i]
}

colData <- column_to_rownames(colData, "sample")
# Create DESeq Data Set
dds <- DESeqDataSetFromMatrix(
  countData = count.matrix,
  colData = colData,
  design = as.formula(sample_params$design)
)

# Set Reference Level
reflevelcond <- trimws(sample_params$ref_level_cond)
reflevelvalue <- trimws(sample_params$ref_level_value)
dds[[reflevelcond]] <- relevel(dds[[reflevelcond]], ref = reflevelvalue)

# Subset out genes with low overall counts
keep <- rowSums(counts(dds) >= 10) >= 4
dds <- dds[keep,]

# Run DESeq
dds <- DESeq(dds)

# Regularized Log Transform
rld <- rlog(dds)
if (as.logical(trimws(sample_params$batch_correct))) {
  
  # Check that condition for batch correction matches column metadata
  if(all(trimws(sample_params$batch_cond) != colnames(metadata))){
    stop(sprintf("The given condition for batch correction:%s, doesn't match any prior conditions: %s", sample_params$batch_cond, colnames(metadata)))
  }
  
  # Replicate / Batch Correction
  batch <- trimws(sample_params$batch_cond)
  
  batch.correct <- ComBat_seq(
    counts = as.matrix(count.matrix),
    batch = metadata[[batch]],
    group = metadata[[colnames(metadata)[colnames(metadata) != batch]]]) %>%
    as.data.frame()

  # Create DESeq Data Set
  batch.correct.dds <- DESeqDataSetFromMatrix(
    countData = batch.correct,
    colData = colData,
    design = as.formula(sample_params$design))
  
  # Set Factor Level
  reflevelcond <- trimws(sample_params$ref_level_cond)
  reflevelvalue <- trimws(sample_params$ref_level_value)
  batch.correct.dds[[reflevelcond]] <- relevel(batch.correct.dds[[reflevelcond]], ref = reflevelvalue)
  
  # Subset out genes with low overall counts
  keep <- rowSums(counts(batch.correct.dds) >= 10) >= 4
  batch.correct.dds <- batch.correct.dds[keep,]
  
  # Run DESeq
  batch.correct.dds <- DESeq(batch.correct.dds)
  
  # Regularized Log Transform
  batch.correct.rld <- rlog(batch.correct.dds)
  
  # Cleanup
  rm(batch, batch.correct)
}

# Cleanup
rm(keep)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
#  Logic here uses the reference level condition and value inputs to create   #
#  contrast terms assumed to compare treatment samples with control samples   #

# # Find relevant contrasts
contrasts <- list()

# Factor values of experimental condition
pattern <- factor(metadata[[reflevelcond]])
comp <- levels(pattern)

# Remove control state from experimental condition vector
comp <- comp[comp != reflevelvalue]

# Create Contrasts
for (i in 1:length(comp)){
  contrasts[[i]] <- c(reflevelcond, comp[i], reflevelvalue)
}
# Pull results from DESeq object with contrasts
for (i in 1:length(contrasts)){
  results[[i]] <- list()
  results[[i]][["DataFrame"]] <- results(dds.for.analysis, contrast = contrasts[[i]]) %>%
    data.frame() %>%
    rownames_to_column("gene_id") %>%
    mutate(gene_name = gene.reference[gene_id, 1], .before=baseMean) %>% # Add gene names to DESeq results matrices
    column_to_rownames("gene_id")
  
  results[[i]][["DESeqDataSet"]] <- results(dds.for.analysis, contrast = contrasts[[i]])
}

Principle Component Analysis

# View non-batch corrected
pca <- visualizePCsByCondition(
  rlog = rld,
  metadata = colData,
  title = "Before Replicate Batch Correction")

pca$logScaleScree

pca$linearScaleScree

pca$selectedPCs

# View Rep effect corrected
if(as.logical(trimws(sample_params$batch_correct))){
pca.batch <- visualizePCsByCondition(
    rlog = batch.correct.rld,
    metadata = colData,
    title = "After Replicate Batch Correction")

print(pca.batch$logScaleScree)
print(pca.batch$linearScaleScree)
print(pca.batch$selectedPCs)
}

strain_hrde1_vs_wt

Inputs for function call:

compileReport(result.obj = results[[1]], contrast = sprintf("%s_%s_vs_%s", 
    contrasts[[i]][1], contrasts[[i]][2], contrasts[[i]][3]), 
    l2fc_filter = 0.8, padj_filter = 0.1) 

DEG Report

Ontology Report

Transfering WORMBASE gene IDs to ENTREZID:

273 gene IDs were not transfered from WORMBASE to ENTREZID 
11510 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.1) is:

470

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.1) is:

361

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.1) is:

109

Dysregulated

Reactome Pathway GSEA

Translation

Mitochondrial translation

Mitochondrial translation termination

Mitochondrial translation elongation

SRP-dependent cotranslational protein targeting to membrane

Eukaryotic Translation Initiation

Cap-dependent Translation Initiation

GTP hydrolysis and joining of the 60S ribosomal subunit

L13a-mediated translational silencing of Ceruloplasmin expression

Formation of a pool of free 40S subunits

Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)

Metabolism of RNA

Aerobic respiration and respiratory electron transport

Nonsense-Mediated Decay (NMD)

Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)

GO GSEA

protein phosphorylation

phosphorylation

protein dephosphorylation

ribonucleoprotein complex biogenesis

dephosphorylation

ribosome biogenesis

tRNA metabolic process

defense response to symbiont

innate immune response

immune response

immune system process

male gamete generation

rRNA metabolic process

rRNA processing

developmental process involved in reproduction

RNA modification

mitochondrial translation

collagen and cuticulin-based cuticle development

mitochondrial gene expression

tRNA processing

spermatogenesis

cell fate commitment

response to biotic stimulus

response to external biotic stimulus

biological process involved in interspecies interaction between organisms

response to other organism

defense response

sexual reproduction

cuticle development

gamete generation

Upregulated

Table Reactome Pathway Over-representation Analysis of Upregulated Genes has no rows.

Downregulated

Table Reactome Pathway Over-representation Analysis of Downregulated Genes has no rows.

strain_hrde2_vs_wt

Inputs for function call:

compileReport(result.obj = results[[1]], contrast = sprintf("%s_%s_vs_%s", 
    contrasts[[i]][1], contrasts[[i]][2], contrasts[[i]][3]), 
    l2fc_filter = 0.8, padj_filter = 0.1) 

DEG Report

Ontology Report

Transfering WORMBASE gene IDs to ENTREZID:

273 gene IDs were not transfered from WORMBASE to ENTREZID 
11510 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.1) is:

470

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.1) is:

361

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.1) is:

109

Dysregulated

Reactome Pathway GSEA

Translation

Mitochondrial translation

Mitochondrial translation elongation

Mitochondrial translation termination

SRP-dependent cotranslational protein targeting to membrane

L13a-mediated translational silencing of Ceruloplasmin expression

Eukaryotic Translation Initiation

Cap-dependent Translation Initiation

GTP hydrolysis and joining of the 60S ribosomal subunit

Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)

Formation of a pool of free 40S subunits

Metabolism of RNA

Respiratory electron transport

Nonsense-Mediated Decay (NMD)

Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)

Aerobic respiration and respiratory electron transport

mRNA Splicing - Major Pathway

Processing of Capped Intron-Containing Pre-mRNA

Formation of the Early Elongation Complex

ROS and RNS production in phagocytes

RNA Polymerase II Promoter Escape

RNA Polymerase II Transcription Pre-Initiation And Promoter Opening

RNA Polymerase II Transcription Initiation

RNA Polymerase II Transcription Initiation And Promoter Clearance

PI Metabolism

Amino acids regulate mTORC1

Cellular response to starvation

RNA Pol II CTD phosphorylation and interaction with CE

DNA Damage/Telomere Stress Induced Senescence

Iron uptake and transport

GO GSEA

phosphorylation

protein phosphorylation

protein dephosphorylation

ribonucleoprotein complex biogenesis

dephosphorylation

ribosome biogenesis

tRNA metabolic process

immune response

immune system process

innate immune response

defense response to symbiont

male gamete generation

rRNA metabolic process

developmental process involved in reproduction

mitochondrial translation

tRNA processing

rRNA processing

cell fate commitment

defense response

mitochondrial gene expression

gamete generation

mitochondrial respiratory chain complex assembly

cuticle development

collagen and cuticulin-based cuticle development

RNA modification

spermatogenesis

sexual reproduction

defense response to other organism

response to biotic stimulus

response to external biotic stimulus

Upregulated

Table Reactome Pathway Over-representation Analysis of Upregulated Genes has no rows.

Downregulated

Table Reactome Pathway Over-representation Analysis of Downregulated Genes has no rows.